import numpy as np
import pandas as pd
import csv

inpath = '../../20 Intermediate Files/'
outpath = '../model/input/'

######################################################################################
# industry-level data

# use the averages from 2004-2007
cols = {'sector_n':'sector_num',
        'sector':'sector_name',
        'delast_S18_usachn':'delast',
        'export_partic_0407':'partic',
        'exit_rate_0407':'exit',
        'entry_rate_0407':'entry',
        'cov_lexports_0407':'cv',
        'incumbent_premium_0407':'inc_prem'}
            
dfs = pd.read_stata(inpath + 'inputs_calibration_sectors.dta')[list(cols.keys())]\
       .rename(columns=cols)\
       .sort_values(by='sector_num',ascending=True)

dfs['sector_num'] = dfs.sector_num-1
dfs.loc[np.isnan(dfs.sector_num),'sector_num']=dfs.sector_num.max()+1
dfs['sector_num'] = dfs.sector_num.astype(int)

# use average of exit rate and entry rate, since they should be the same in a steady state
dfs['exit'] = (dfs['exit'] + dfs['entry'])/2
dfs.drop('entry',axis=1,inplace=True)

dfs[['sector_num','delast','partic','exit','inc_prem','cv']].to_csv(outpath + 'inputs_calibration_sector.csv',index=False,sep=' ',header=False)

######################################################################################
# time-invariant good-level data

cols = {'sitc':'good_name',
        'sector_n':'sector_num',
        'nntr_med':'nntr',
        'gap_med':'gap'}

dfg = pd.read_stata(inpath + 'inputs_calibration_g.dta')[list(cols.keys())]\
       .rename(columns=cols).reset_index().rename(columns={'index':'good_num'})

dfg['sector_num'] = dfg.sector_num-1
dfs['sector_num'] = dfs.sector_num.astype(int)

dfg[['good_num','sector_num','nntr','gap']].to_csv(outpath + 'inputs_calibration_g.csv',index=False,header=False,sep=' ')


######################################################################################
# time-series data for good-level MFN tariffs

cols = {'sitc':'good_name',
        'year':'year',
        'mfn_tariff':'mfn'}
dfgt = pd.read_stata(inpath + 'inputs_calibration_gt.dta')[list(cols.keys())]\
       .rename(columns=cols)

dfgt = dfgt[ (dfgt.year>=1974) & (dfgt.year<=2008) ]

# fill in missing data

def fillin(df,cols):  
    tmp = pd.MultiIndex.from_product([df[c].unique().tolist() for c in cols], names = cols)
    tmp2 = pd.DataFrame(index = tmp).reset_index()
    df2 = pd.merge(left=df,right=tmp2,how='right',on=cols,indicator=True)
    df2['_fillin'] = df2._merge=='right_only'
    df2.drop('_merge',axis=1,inplace=True)
    return df2

dfgt = fillin(dfgt,['good_name','year']).drop('_fillin',axis=1)
dfgt.sort_values(by=['good_name','year'],ascending=[True,True],inplace=True)
dfgt.reset_index(drop=True,inplace=True)

dfgt['mfn'] = dfgt.groupby('good_name')['mfn'].transform(lambda x: x.fillna(method='bfill'))
dfgt['mfn'] = dfgt.groupby('good_name')['mfn'].transform(lambda x: x.fillna(method='ffill'))
dfgt['mfn'] = dfgt.groupby('good_name')['mfn'].transform(lambda x: x.interpolate(method='nearest'))

# merge on numeric good indices from previous file
dfgt = pd.merge(left=dfgt,right=dfg[['good_num','good_name','nntr']],how='left',on='good_name')

dfgt.year = dfgt.year.astype(int)

dfgt[['good_num','year','mfn']].to_csv(outpath + 'inputs_calibration_gt.csv',index=False,header=False,sep=' ')

######################################################################################

df_lr = pd.read_stata(inpath + 'inputs_calibration_lrelast.dta')
